World University Rankings

Open necessary datasets

cwurData <- read.csv("cwurData.csv")
educationExpenditure <- read.csv("education_expenditure_supplementary_data.csv")
educationalAttainment <- read.csv("educational_attainment_supplementary_data.csv")
schoolCountry <- read.csv("school_and_country_table.csv")
shanghaiData <- read.csv("shanghaiData.csv")
timesData <- read.csv("timesData.csv")

Getting basic information from tables

Central World University Rankings

knitr::kable(head(cwurData,10), caption = "Central World University Rankings information (first 10 rows)")
Central World University Rankings information (first 10 rows)
world_rank institution country national_rank quality_of_education alumni_employment quality_of_faculty publications influence citations broad_impact patents score year
1 Harvard University USA 1 7 9 1 1 1 1 NA 5 100.00 2012
2 Massachusetts Institute of Technology USA 2 9 17 3 12 4 4 NA 1 91.67 2012
3 Stanford University USA 3 17 11 5 4 2 2 NA 15 89.50 2012
4 University of Cambridge United Kingdom 1 10 24 4 16 16 11 NA 50 86.17 2012
5 California Institute of Technology USA 4 2 29 7 37 22 22 NA 18 85.21 2012
6 Princeton University USA 5 8 14 2 53 33 26 NA 101 82.50 2012
7 University of Oxford United Kingdom 2 13 28 9 15 13 19 NA 26 82.34 2012
8 Yale University USA 6 14 31 12 14 6 15 NA 66 79.14 2012
9 Columbia University USA 7 23 21 10 13 12 14 NA 5 78.86 2012
10 University of California, Berkeley USA 8 16 52 6 6 5 3 NA 16 78.55 2012

Shanghai Rankings

knitr::kable(head(shanghaiData,10),caption="Shanghai Ranking information (first 10 rows)")
Shanghai Ranking information (first 10 rows)
world_rank university_name national_rank total_score alumni award hici ns pub pcp year
1 Harvard University 1 100.0 100.0 100.0 100.0 100.0 100.0 72.4 2005
2 University of Cambridge 1 73.6 99.8 93.4 53.3 56.6 70.9 66.9 2005
3 Stanford University 2 73.4 41.1 72.2 88.5 70.9 72.3 65.0 2005
4 University of California, Berkeley 3 72.8 71.8 76.0 69.4 73.9 72.2 52.7 2005
5 Massachusetts Institute of Technology (MIT) 4 70.1 74.0 80.6 66.7 65.8 64.3 53.0 2005
6 California Institute of Technology 5 67.1 59.2 68.6 59.8 65.8 52.5 100.0 2005
7 Columbia University 6 62.3 79.4 60.6 56.1 54.2 69.5 45.4 2005
8 Princeton University 7 60.9 63.4 76.8 60.9 48.7 48.5 59.1 2005
9 University of Chicago 8 60.1 75.6 81.9 50.3 44.7 56.4 42.2 2005
10 University of Oxford 2 59.7 64.3 59.1 48.4 55.6 68.4 53.2 2005

Education attainment

knitr::kable(head(educationalAttainment,10),caption="Education attainment information (first 10 rows)")
Education attainment information (first 10 rows)
country_name series_name X1985 X1986 X1987 X1990 X1991 X1992 X1993 X1995 X1996 X1997 X1998 X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009 X2010 X2011 X2012 X2013 X2015
Afghanistan Barro-Lee: Average years of primary schooling, age 15+, female 0.33 NA NA 0.44 NA NA NA 0.57 NA NA NA NA 0.75 NA NA NA NA 0.86 NA NA NA NA 1.27 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 15+, total 1.03 NA NA 1.26 NA NA NA 1.54 NA NA NA NA 2.01 NA NA NA NA 2.18 NA NA NA NA 2.64 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 15-19, female 0.83 NA NA 0.95 NA NA NA 1.26 NA NA NA NA 1.92 NA NA NA NA 1.01 NA NA NA NA 2.45 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 15-19, total 2.34 NA NA 2.22 NA NA NA 2.37 NA NA NA NA 3.83 NA NA NA NA 2.26 NA NA NA NA 3.55 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 20-24, female 0.54 NA NA 0.92 NA NA NA 0.94 NA NA NA NA 1.26 NA NA NA NA 2.00 NA NA NA NA 1.29 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 20-24, total 1.52 NA NA 2.51 NA NA NA 2.27 NA NA NA NA 2.48 NA NA NA NA 3.93 NA NA NA NA 2.64 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 25+, female 0.17 NA NA 0.25 NA NA NA 0.37 NA NA NA NA 0.48 NA NA NA NA 0.63 NA NA NA NA 0.81 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 25+, total 0.66 NA NA 0.85 NA NA NA 1.14 NA NA NA NA 1.38 NA NA NA NA 1.69 NA NA NA NA 2.19 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 25-29, female 0.44 NA NA 0.54 NA NA NA 0.92 NA NA NA NA 0.94 NA NA NA NA 1.26 NA NA NA NA 1.92 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 25-29, total 1.28 NA NA 1.52 NA NA NA 2.51 NA NA NA NA 2.27 NA NA NA NA 2.48 NA NA NA NA 3.93 NA NA NA NA

Education expenditure

knitr::kable(head(educationExpenditure,10),caption="Education expenditure information (first 10 rows)")
Education expenditure information (first 10 rows)
country institute_type direct_expenditure_type X1995 X2000 X2005 X2009 X2010 X2011
OECD Average All Institutions Public 4.9 4.9 5.0 5.4 5.4 5.3
Australia All Institutions Public 4.5 4.6 4.3 4.5 4.6 4.3
Austria All Institutions Public 5.3 5.4 5.2 5.7 5.6 5.5
Belgium All Institutions Public 5.0 5.1 5.8 6.4 6.4 6.4
Canada All Institutions Public 5.8 5.2 4.8 5.0 5.2 NA
Chile All Institutions Public NA 4.2 3.3 4.1 4.3 3.9
Czech Republic All Institutions Public 4.8 4.2 4.1 4.2 4.1 4.4
Denmark All Institutions Public 6.5 6.4 6.8 7.5 7.6 7.5
Estonia All Institutions Public NA NA 4.7 5.9 5.6 5.2
Finland All Institutions Public 6.6 5.5 5.9 6.3 6.4 6.3

Times Education Rankings

knitr::kable(head(timesData,10),caption="Times Higher Education World University Rankings data information (first 10 rows)")
Times Higher Education World University Rankings data information (first 10 rows)
world_rank university_name country teaching international research citations income total_score num_students student_staff_ratio international_students female_male_ratio year
1 Harvard University United States of America 99.7 72.4 98.7 98.8 34.5 96.1 20,152 8.9 25% 2011
2 California Institute of Technology United States of America 97.7 54.6 98.0 99.9 83.7 96.0 2,243 6.9 27% 33 : 67 2011
3 Massachusetts Institute of Technology United States of America 97.8 82.3 91.4 99.9 87.5 95.6 11,074 9.0 33% 37 : 63 2011
4 Stanford University United States of America 98.3 29.5 98.1 99.2 64.3 94.3 15,596 7.8 22% 42 : 58 2011
5 Princeton University United States of America 90.9 70.3 95.4 99.9 - 94.2 7,929 8.4 27% 45 : 55 2011
6 University of Cambridge United Kingdom 90.5 77.7 94.1 94.0 57.0 91.2 18,812 11.8 34% 46 : 54 2011
6 University of Oxford United Kingdom 88.2 77.2 93.9 95.1 73.5 91.2 19,919 11.6 34% 46 : 54 2011
8 University of California, Berkeley United States of America 84.2 39.6 99.3 97.8 - 91.1 36,186 16.4 15% 50 : 50 2011
9 Imperial College London United Kingdom 89.2 90.0 94.5 88.3 92.9 90.6 15,060 11.7 51% 37 : 63 2011
10 Yale University United States of America 92.1 59.2 89.7 91.5 - 89.5 11,751 4.4 20% 50 : 50 2011

School & Country

knitr::kable(head(schoolCountry,10),caption="School & country information (first 10 rows)")
School & country information (first 10 rows)
school_name country
Harvard University United States of America
California Institute of Technology United States of America
Massachusetts Institute of Technology United States of America
Stanford University United States of America
Princeton University United States of America
University of Cambridge United Kingdom
University of Oxford United Kingdom
University of California, Berkeley United States of America
Imperial College London United Kingdom
Yale University United States of America
glimpse(cwurData)
## Rows: 2,200
## Columns: 14
## $ world_rank           <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15~
## $ institution          <chr> "Harvard University", "Massachusetts Institute of~
## $ country              <chr> "USA", "USA", "USA", "United Kingdom", "USA", "US~
## $ national_rank        <int> 1, 2, 3, 1, 4, 5, 2, 6, 7, 8, 9, 10, 11, 1, 12, 1~
## $ quality_of_education <int> 7, 9, 17, 10, 2, 8, 13, 14, 23, 16, 15, 21, 31, 3~
## $ alumni_employment    <int> 9, 17, 11, 24, 29, 14, 28, 31, 21, 52, 26, 42, 16~
## $ quality_of_faculty   <int> 1, 3, 5, 4, 7, 2, 9, 12, 10, 6, 8, 14, 24, 31, 20~
## $ publications         <int> 1, 12, 4, 16, 37, 53, 15, 14, 13, 6, 34, 22, 9, 8~
## $ influence            <int> 1, 4, 2, 16, 22, 33, 13, 6, 12, 5, 20, 21, 10, 19~
## $ citations            <int> 1, 4, 2, 11, 22, 26, 19, 15, 14, 3, 28, 16, 8, 23~
## $ broad_impact         <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ patents              <int> 5, 1, 15, 50, 18, 101, 26, 66, 5, 16, 101, 10, 9,~
## $ score                <dbl> 100.00, 91.67, 89.50, 86.17, 85.21, 82.50, 82.34,~
## $ year                 <int> 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2~

Review and reveal interesting facts

cwurData %>% group_by(year) %>% 
  select(year,institution,world_rank) %>% top_n(-5, wt = world_rank) -> cwurTop5

plot_ly(cwurTop5, x = ~year) %>%
  add_trace(y = cwurTop5$world_rank, name = cwurTop5$institution, showlegend=TRUE, type = 'scatter', mode = 'lines+markers', color= cwurTop5$institution) %>%
  layout(title="World Ranked Universities by CWUR (2012-2015)",
         xaxis = list(showticklabels = TRUE, tickangle = 0, tickfont = list(size = 8)),
         yaxis = list(title = "World rank"),
         hovermode = 'compare')
cwurPlotYear <- function(nYear) {
  cwurData %>% filter(year==nYear) %>% top_n(10,-world_rank) %>% 
  ggplot(aes(x=reorder(institution,-world_rank), y=world_rank)) + geom_bar(stat="identity", aes(fill=reorder(institution,-world_rank)), colour="black") +
    theme_bw() + coord_flip() +  scale_fill_manual(values=c(rep("lightgreen",7), "#CD7F32", "grey", "gold")) + guides(fill=FALSE) +
    labs(x="Institution", y="World Rank", 
        title=paste("Rank in ",nYear), subtitle="(smaller value is better)")
}
cwurPlotYear(2012) -> d1
cwurPlotYear(2013) -> d2
cwurPlotYear(2014) -> d3
cwurPlotYear(2015) -> d4
grid.arrange(d1,d2,d3,d4, ncol=2)

cwurData %>% group_by(country) %>% summarise(n = length(publications)) %>% top_n(10,n) %>% ungroup() -> c
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=publications, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Rank by publication", 
      title="Rank by publication", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d1
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=citations, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Rank by citations", 
      title="Rank by citations", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d2
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=patents, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Rank by patents", 
      title="Rank by patents", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d3
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=quality_of_education, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Rank by quality of education", 
      title="Rank by quality of education", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d4
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=alumni_employment, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Rank by alumni employment", 
      title="Rank by alumni employment", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d5
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=quality_of_faculty, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Rank by quality of faculty", 
      title="Rank by quality of faculty", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d6
grid.arrange(d1,d2,d3,d4,d5,d6, ncol=2)

cwurData %>% group_by(country,year) %>% 
  summarise(nr = length(world_rank), minw=min(world_rank), maxw=max(world_rank), avgw=round(mean(world_rank),0)) %>%
  select(country, year, nr, minw, maxw, avgw) %>% ungroup() -> ccwur
# light grey boundaries
l <- list(color = toRGB("grey"), width = 0.5)
ccwur$hover <- with(ccwur, 
        paste("Country: ", country, '<br>', 
              "Year: ",year, "<br>",
              "Universities in top: ", nr, "<br>",
              "Min rank in top: ", minw, "<br>",
              "Max rank in top: ", maxw, "<br>",
              "Mean rank in top: ", avgw,"<br>"
              ))
g <- list(
  showframe = TRUE,
  showcoastlines = TRUE,
  projection = list(type = 'orthogonal')
)
plot_geo(ccwur, locationmode = 'country names') %>%
  add_trace(
    z = ~nr, color = ~nr, colors = 'Spectral', frame = ~year,
    text = ~hover, locations=~country, marker = list(line = l)
  ) %>%
  colorbar(title = 'Number of\nuniversities in top', tickprefix = '') %>%
  layout(
    title = with(ccwur, paste('Number of universities in top<br>Source:<a href="http://cwur.org/">Council of World University Ranking</a>')),
    geo = g
  )

Check Shanghai Data

shanghaiDataCld = shanghaiData
shanghaiDataCld$t_score = 
  0.1 * shanghaiDataCld$alumni + 0.2 * shanghaiDataCld$award + 0.2 * shanghaiDataCld$hici + 
  0.2 * shanghaiDataCld$ns + 0.2 * shanghaiDataCld$pub + 0.1 * shanghaiDataCld$pcp
shanghaiDataCld$total_score[is.na(shanghaiDataCld$total_score)] = shanghaiDataCld$t_score[is.na(shanghaiDataCld$total_score)]
shanghaiDataCld = shanghaiDataCld[complete.cases(shanghaiDataCld),]
#Fix the duplicate name for University of California-Berkeley
shanghaiDataCld$university_name[shanghaiDataCld$university_name=="University of California-Berkeley"] <- "University of California, Berkeley"
shanghaiDataCld %>% group_by(year) %>% 
  top_n(10, wt = total_score) %>% select(year,university_name,total_score,alumni, award, hici, ns, pub, pcp) %>% ungroup() -> top10univ
 
 #draw with plotly
 
plot_ly(top10univ, x = ~year) %>%
  add_trace(y = top10univ$total_score, name = top10univ$university_name, showlegend=TRUE, type = 'scatter', mode = 'lines+markers', color= top10univ$university_name) %>%
  layout(title="Shanghai (ARWU) World Ranks (2005-2015)<br>Best ranked universities based on total score", legend = list(orientation = 'h'),
         xaxis = list(showticklabels = TRUE, tickangle = 0, tickfont = list(size = 8)),
         yaxis = list(title = "Total score"),
         hovermode = 'compare')

Shanghai Top Universities by Year

top10SpiderWebYear <- function(nYear) {
    top10univ %>% filter(year==nYear) %>% ungroup() -> top10u
    top10 <- as.data.frame(cbind(top10u[,c(3,4,5,6,7,8,9)]))
    colnames(top10) <- c("Total Score", "Alumni with Nobel", "Awarded Nobel", "Highly Cited", 
                         "Nature&Science", "Publications", "PCAP")
    rownames(top10) <- top10u$university_name
    rmin <- apply(top10,2,min); rmax <- apply(top10,2,max)
     rmax <- 100
     rmin <- 0
    colors_border=c( "tomato", "blue", "gold", "green", "magenta", 
                 "yellow", "grey", "lightblue", "brown", "red", "lightgreen", "cyan" )
    par(mfrow=c(4,3))
    par(mar=c(1,1,5,1))
    for(i in 1:nrow(top10)){
      colorValue<-(col2rgb(as.character(colors_border[i]))%>% as.integer())/255
      radarchart(rbind(rmax,rmin,top10[i,]),
         axistype=2 , 
         pcol=rgb(colorValue[1],colorValue[2],colorValue[3], alpha = 1),
         pfcol=rgb(colorValue[1],colorValue[2],colorValue[3], alpha = 0.5),
         plwd=1 , plty=1,cglcol="grey", cglty=1, axislabcol="grey", cglwd=0.5,vlcex=0.7, 
         title=rownames(top10[i,]))
    }
    title(paste0('\nShanghai World University  Rankings top 10 (',nYear,')'),outer=TRUE,col.main='black',cex.main=1.5)
}

Shanghai World University Rankings by Year

2005

top10SpiderWebYear(2005)

2006

top10SpiderWebYear(2006)

2007

top10SpiderWebYear(2007)

2008

top10SpiderWebYear(2008)

2009

top10SpiderWebYear(2009)

2010

top10SpiderWebYear(2010)

2011

top10SpiderWebYear(2011)

2012

top10SpiderWebYear(2012)

2013

top10SpiderWebYear(2013)

2014

top10SpiderWebYear(2014)

2015

top10SpiderWebYear(2015)

Rankings by All Countries

merge(shanghaiDataCld,schoolCountry, by.x="university_name", by.y="school_name") -> scData

scData %>% group_by(country) %>% summarise(n = length(alumni)) %>% top_n(10,n) %>% ungroup() -> cs

scData %>% filter(country %in% cs$country) %>%
ggplot(aes(x=country, y=alumni, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Alumni with Nobel (score)", 
      title="Alumni with Nobel (score)", subtitle="Grouped by country") -> d1
scData %>% filter(country %in% cs$country) %>%
ggplot(aes(x=country, y=award, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Nobel awards score", 
      title="Nobel awards score", subtitle="Grouped by country") -> d2
scData %>% filter(country %in% cs$country) %>%
ggplot(aes(x=country, y=hici, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Highly cited score", 
      title="Highly cited score", subtitle="Grouped by country") -> d3
scData %>% filter(country %in% cs$country) %>%
ggplot(aes(x=country, y=ns, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Nature & Science publications score", 
      title="Nature & Science publications score", subtitle="Grouped by country") -> d4
scData %>% filter(country %in% cs$country) %>%
ggplot(aes(x=country, y=pub, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Publications score", 
      title="Publications score", subtitle="Grouped by country") -> d5
scData %>% filter(country %in% cs$country) %>%
ggplot(aes(x=country, y=pcp, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Per capita performance score", 
      title="Per capita performance score", subtitle="Grouped by country") -> d6

grid.arrange(d1,d2,d3, d4, d5, d6, ncol=2)

scData %>%  group_by(country, year) %>% 
summarise(nr = length(total_score), minw=min(total_score), maxw=max(total_score), avgw=round(mean(total_score),0)) %>%
select(country, year, nr, minw, maxw, avgw) %>% ungroup() -> swur
# light grey boundaries
l <- list(color = toRGB("grey"), width = 0.5)
swur$hover <- with(swur, 
      paste("Country: ", country, '<br>', 
            "Year: ",year, "<br>",
            "Universities: ", nr, "<br>",
            "Min total score: ", minw, "<br>",
            "Max total score: ", maxw, "<br>",
            "Mean total score: ", avgw,"<br>"
            ))
# specify map projection/options
g <- list(
showframe = TRUE,
showcoastlines = TRUE,
projection = list(type = 'Mercator')
)
plot_geo(swur, locationmode = 'country names') %>%
add_trace(
  z = ~nr, color = ~nr, colors = 'Spectral', frame = ~year,
  text = ~hover, locations=~country, marker = list(line = l)
) %>%
colorbar(title = 'Number of\nuniversities', tickprefix = '') %>%
layout(
  title = with(swur, paste('Number of universities<br>Source:<a href="http://www.shanghairanking.com">Shanghai Academic World University Rankings</a>')),
  geo = g
)

Times Higher Education University Rankings

#replace first the missing values (`-`) with NA
timesData$teaching[timesData$teaching=='-'] <- NA
timesData$international[timesData$international=='-'] <- NA
timesData$research[timesData$research=='-'] <- NA
timesData$citations[timesData$citations=='-'] <- NA
timesData$income[timesData$income=='-'] <- NA
timesData$total_score[timesData$total_score=='-'] <- NA

#replace factors with numeric
timesData$teaching <- as.numeric(as.character(timesData$teaching))
timesData$international <- as.numeric(as.character(timesData$international))
timesData$research <- as.numeric(as.character(timesData$research))
timesData$citations <- as.numeric(as.character(timesData$citations))
timesData$income <- as.numeric(as.character(timesData$income))
timesData$total_score <- as.numeric(as.character(timesData$total_score))

# replace NAs with 0
timesData$income[is.na(timesData$income)] <- 0
timesData$international[is.na(timesData$international)] <- 0

#calculate the total score
timesData$t_score = 
  0.3 * as.numeric(as.character(timesData$teaching)) + 
  0.075 * as.numeric(as.character(timesData$international)) + 
  0.3 * as.numeric(as.character(timesData$research)) + 
  0.3 * as.numeric(as.character(timesData$citations)) + 
  0.025 * as.numeric(as.character(timesData$income))
  
#replace the total_score where missing with the calculated value
timesData$total_score[is.na(timesData$total_score)] <- timesData$t_score[is.na(timesData$total_score)]
timesData$wr = as.numeric(as.character(timesData$world_rank))
## Warning: в результате преобразования созданы NA
thePlotYear <- function(nYear) {
  timesData %>% filter(year==nYear) %>% top_n(10,-wr) %>% 
  ggplot(aes(x=reorder(university_name,-wr), y=wr)) + geom_bar(stat="identity", aes(fill=reorder(university_name,-wr)), colour="black") +
    theme_bw() + coord_flip() +  scale_fill_manual(values=c(rep("lightgreen",7), "#CD7F32", "grey", "gold")) + guides(fill=FALSE) +
    labs(x="University name", y="World Rank", 
        title=paste("Rank in ",nYear), subtitle="(smaller value is better)") 
}
thePlotYear(2011) -> d1
thePlotYear(2012) -> d2
thePlotYear(2013) -> d3
thePlotYear(2014) -> d4
thePlotYear(2015) -> d5
thePlotYear(2016) -> d6
grid.arrange(d1,d2,d3,d4,d5,d6, ncol=2)

Ranks by Year

timesData %>% group_by(year) %>% 
  top_n(10, wt = total_score) %>% 
  select(year,university_name,total_score,teaching, international, research, citations, income) %>% ungroup() -> top10univ
theTop10SpiderWebYear <- function(nYear) {
    top10univ %>% filter(year==nYear) %>% ungroup() -> top10u
    top10 <- as.data.frame(cbind(top10u[,c(3,4,5,6,7,8)]))
    colnames(top10) <- c("Total Score", "Teaching", "International Outlook", "Research", 
                          "Citations","Industry Income")
    rownames(top10) <- top10u$university_name
    
    rmin <- apply(top10,2,min); rmax <- apply(top10,2,max)
     rmax <- 100
     rmin <- 0
  
    colors_border=c( "tomato", "blue", "gold", "green", "magenta", 
                 "yellow", "grey", "lightblue", "brown", "red", "lightgreen", "cyan" )

    par(mfrow=c(4,3))
    par(mar=c(1,1,5,1))
    for(i in 1:nrow(top10)){
      colorValue<-(col2rgb(as.character(colors_border[i]))%>% as.integer())/255
      radarchart(rbind(rmax,rmin,top10[i,]),
         axistype=2 , 
         pcol=rgb(colorValue[1],colorValue[2],colorValue[3], alpha = 1),
         pfcol=rgb(colorValue[1],colorValue[2],colorValue[3], alpha = 0.5),
         plwd=1 , plty=1,cglcol="grey", cglty=1, axislabcol="grey", cglwd=0.5,vlcex=0.7, 
         title=rownames(top10[i,]))
    }
    title(paste0('\nTimes Higher Education  World University Rankings top 10 (',nYear,')'),outer=TRUE,col.main='black',cex.main=1.5)
}

2011

theTop10SpiderWebYear(2011)

2012

theTop10SpiderWebYear(2012)

2013

theTop10SpiderWebYear(2013)

2014

theTop10SpiderWebYear(2014)

2015

theTop10SpiderWebYear(2015)

2016

theTop10SpiderWebYear(2016)

All countries total scores and counts

timesData %>% group_by(country) %>% summarise(n = length(teaching)) %>% top_n(10,n) %>% ungroup() -> ct
timesData %>% filter(country %in% ct$country) %>%
ggplot(aes(x=country, y=teaching, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Teaching score", 
      title="Teaching score", subtitle="Grouped by country") -> d1
timesData %>% filter(country %in% ct$country) %>%
ggplot(aes(x=country, y=international, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="International outlook score", 
      title="International outlook score", subtitle="Grouped by country") -> d2
timesData %>% filter(country %in% ct$country) %>%
ggplot(aes(x=country, y=research, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Research score", 
      title="Research score", subtitle="Grouped by country") -> d3
timesData %>% filter(country %in% ct$country) %>%
ggplot(aes(x=country, y=citations, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Citations score", 
      title="Citations score", subtitle="Grouped by country") -> d4
timesData %>% filter(country %in% ct$country) %>%
ggplot(aes(x=country, y=income, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Industry income score", 
      title="Industry income score", subtitle="Grouped by country") -> d5
timesData %>% filter(country %in% ct$country) %>%
ggplot(aes(x=country, y=total_score, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Total score", 
      title="Total score", subtitle="Grouped by country") -> d6
grid.arrange(d1,d2,d3,d4, d5, d6, ncol=2)

timesData$total_score = as.numeric(as.character(timesData$total_score))
#replace with 0 the missing total_score values - this will affect the aggregated values

timesData %>% group_by(country,year) %>% 
summarise(nr = length(total_score), minw=min(total_score), maxw=max(total_score), avgw=round(mean(total_score),0)) %>%
select(country, year, nr, minw, maxw, avgw) %>% ungroup() -> ther

# light grey boundaries
l <- list(color = toRGB("grey"), width = 0.5)

ther$hover <- with(ther, 
      paste("Country: ", country, '<br>', 
            "Year: ",year, "<br>",
            "Universities: ", nr, "<br>",
            "Min total score: ", minw, "<br>",
            "Max total score: ", maxw, "<br>",
            "Mean total score: ", avgw,"<br>"
            ))
# specify map projection/options
g <- list(
showframe = TRUE,
showcoastlines = TRUE,
projection = list(type = 'Mercator')
)
plot_geo(ther, locationmode = 'country names') %>%
add_trace(
  z = ~nr, color = ~nr, colors = 'Spectral', frame = ~year,
  text = ~hover, locations=~country, marker = list(line = l)
) %>%
colorbar(title = 'Number of\nuniversities', tickprefix = '') %>%
layout(
  title = with(ther, paste('Number of universities<br>Source:<a href="https://www.timeshighereducation.com/world-university-rankings">Times Higher Education World University Ranking</a>')),
  geo = g
)